import nltk
from nltk.probability import FreqDist
from nucular import Nucular

SIMPLE_ARCHIVE = 'c:/users/vaidhy/Development/scipy/archive/simple'
fd = FreqDist()

def getResults(w):
    archive = Nucular.Nucular(SIMPLE_ARCHIVE)
    q = archive.Query()
    q.anyWord(w)
    results = q.resultDictionaries()
    return results

def findDistribution(w):
    results = getResults(w)
    global fd
    fd.clear()
    for d in results:
        sentences = nltk.sent_tokenize(d['content'])
        for sentence in sentences:
            [fd.inc(l) for l in nltk.word_tokenize(sentence)]
    print "Count : " + str(fd.N())
    print "Uniques : " + str(fd.B())
    for word in fd.keys()[:10]: print word, fd[word]
    for word in fd.keys()[4620:]: print word, fd[word]        

        
if __name__ == "__main__":   
    findDistribution('parseltongue')
